# check data
d <- read_csv("/work/50114/MAG/data/modeling/psych_replication_matched.csv") %>%
  mutate(log_teamsize = log(n_authors), 
         condition_coded = ifelse(condition == "experiment", 1, 0),
         condition_fct = as_factor(condition), 
         teamsize_scaled = (n_authors-min(n_authors))/(max(n_authors)-min(n_authors)),
         days_after_2010_scaled = days_after_2010/max(days_after_2010),
         id_fct = as_factor(PaperId)) %>% # because min = 0
  glimpse()
## Rows: 744 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): condition
## dbl (5): match_group, n_authors, PaperId, days_after_2010, c_5
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 744
## Columns: 12
## $ match_group            <dbl> 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 8, 8, 9, 9, 10…
## $ condition              <chr> "experiment", "control", "experiment", "control…
## $ n_authors              <dbl> 5, 5, 1, 1, 11, 11, 2, 2, 2, 4, 4, 4, 4, 6, 6, …
## $ PaperId                <dbl> 2134174525, 1490919247, 2395494269, 2007726610,…
## $ days_after_2010        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 31, 31, …
## $ c_5                    <dbl> 0, 0, 0, 0, 10, 0, 6, 0, 6, 2, 133, 0, 28, 65, …
## $ log_teamsize           <dbl> 1.6094379, 1.6094379, 0.0000000, 0.0000000, 2.3…
## $ condition_coded        <dbl> 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1,…
## $ condition_fct          <fct> experiment, control, experiment, control, exper…
## $ teamsize_scaled        <dbl> 0.08, 0.08, 0.00, 0.00, 0.20, 0.20, 0.02, 0.02,…
## $ days_after_2010_scaled <dbl> 0.000000000, 0.000000000, 0.000000000, 0.000000…
## $ id_fct                 <fct> 2134174525, 1490919247, 2395494269, 2007726610,…

Different ways of specifying something similar. We had issues with model (f_team_0) earlier. Trying to troubleshoot whether it is related to intercept & I think that (0 + Intercept) syntax is actually more appropriate since it does not assume mean centering (something like that).

f_team_0 <- bf(c_5 ~ 0 + condition_fct + condition_fct:teamsize_scaled + (1|id_fct))
f_team_1 <- bf(c_5 ~ 1 + condition_fct + condition_fct:teamsize_scaled + (1|id_fct))
f_team_01 <- bf(c_5 ~ 0 + Intercept + condition_fct + condition_fct:teamsize_scaled + (1|id_fct))

Just doing negbinomial() for now, since we had Rhat issues for both negative binomial and zero-inflated negative binomial (does not seem to be the main cause of issues).

f_team_0: b, sd, shape f_team_1: b, Intercept, sd, shape f_team_01: b, sd, shape (Intercept becomes b).

set priors

# negbin baseline
negbin_0 <- c(prior(gamma(0.01, 0.01), class = shape), 
              prior(normal(0, 1), class = b),
              prior(normal(0, 1), class = sd)) # a wild guess

# zinegbin baseline
negbin_1 = c(prior(gamma(0.01, 0.01), class = shape), 
             prior(normal(0, 1), class = b),
             prior(normal(0, 1), class = Intercept),
             prior(normal(0, 1), class = sd)) # a wild guess

# can be used for all interactions (without thinking)
negbin_01 <- c(prior(gamma(0.01, 0.01), class = shape), 
              prior(normal(0, 1), class = b),
              prior(normal(0, 1), class = sd)) # a wild guess

sample prior only

Some warnings and divergences.

check priors

prior_check <- function(model, ndraws, title, xmax){
  
  pp_check(model, 
           ndraws = ndraws) +
    labs(title = title) + 
    theme_minimal() + 
    xlim(0, xmax)
  
} 
prior_check(negbin_prior_0, 100, "No Intercept (x cutoff: 25)", 25)
## Warning: Removed 81 rows containing non-finite values (stat_density).
## Warning: Removed 52 rows containing non-finite values (stat_density).

prior_check(negbin_prior_1, 100, "Intercept (x cutoff: 25)", 25)
## Warning: Removed 231 rows containing non-finite values (stat_density).

## Warning: Removed 52 rows containing non-finite values (stat_density).

prior_check(negbin_prior_01, 100, "0 + Intercept (x cutoff: 25)", 25)
## Warning: Removed 54 rows containing non-finite values (stat_density).

## Warning: Removed 52 rows containing non-finite values (stat_density).

fit models

## baseline 
negbin_post_0 <- fit_model(
  family = negbinomial(), 
  formula = f_team_0,
  prior = negbin_0,
  sample_prior = TRUE,
  file = "/work/50114/MAG/modeling/models/negbin_post_0"
)

## baseline 
negbin_post_1 <- fit_model(
  family = negbinomial(), 
  formula = f_team_1,
  prior = negbin_1,
  sample_prior = TRUE,
  file = "/work/50114/MAG/modeling/models/negbin_post_1"
)

## baseline 
negbin_post_01 <- fit_model(
  family = negbinomial(), 
  formula = f_team_01,
  prior = negbin_01,
  sample_prior = TRUE,
  file = "/work/50114/MAG/modeling/models/negbin_post_01"
)

check traces

# some auto-correlation
# effect almost entirely in random effect
plot(negbin_post_0, N = 3)

# some auto-correlation
# effect almost entirely in random effect
plot(negbin_post_1, N = 3)

# same issues 
plot(negbin_post_01, N = 3)

Seems like the issue is not the particular specification: This post explains why it is problematic to have random effect for single observation: https://stats.stackexchange.com/questions/242821/how-will-random-effects-with-only-1-observation-affect-a-generalized-linear-mixe

Try with random effect per group (i.e. experiment + control)

create the variable (as factor)

d <- d %>%
  mutate(id_match = as_factor(match_group))

new model formulae

f_team_match_0 <- bf(c_5 ~ 0 + condition_fct + condition_fct:teamsize_scaled + (1|id_match))
f_team_match_1 <- bf(c_5 ~ 1 + condition_fct + condition_fct:teamsize_scaled + (1|id_match))
f_team_match_01 <- bf(c_5 ~ 0 + Intercept + condition_fct + condition_fct:teamsize_scaled + (1|id_match))

prior

## baseline 
negbin_prior_match_0 <- fit_model(
  family = negbinomial(), 
  formula = f_team_match_0,
  prior = negbin_0, # same prior 
  sample_prior = "only",
  file = "/work/50114/MAG/modeling/models/negbin_prior_match_0"
)

## baseline 
negbin_prior_match_1 <- fit_model(
  family = negbinomial(), 
  formula = f_team_match_1,
  prior = negbin_1,
  sample_prior = "only",
  file = "/work/50114/MAG/modeling/models/negbin_prior_match_1"
)

## baseline 
negbin_prior_match_01 <- fit_model(
  family = negbinomial(), 
  formula = f_team_match_01,
  prior = negbin_01,
  sample_prior = "only",
  file = "/work/50114/MAG/modeling/models/negbin_prior_match_01"
)

prior check

A few divergent transitions (for all of them, between 7-9/4000)

prior_check(negbin_prior_0, 100, "No Intercept (x cutoff: 25)", 25)
## Warning: Removed 95 rows containing non-finite values (stat_density).
## Warning: Removed 52 rows containing non-finite values (stat_density).

prior_check(negbin_prior_1, 100, "Intercept (x cutoff: 25)", 25)
## Warning: Removed 78 rows containing non-finite values (stat_density).

## Warning: Removed 52 rows containing non-finite values (stat_density).

prior_check(negbin_prior_01, 100, "0 + Intercept (x cutoff: 25)", 25)
## Warning: Removed 57 rows containing non-finite values (stat_density).

## Warning: Removed 52 rows containing non-finite values (stat_density).

fit models

a few pareto_k > 0.7 (but few compared to above).

## baseline 
negbin_post_match_0 <- fit_model(
  family = negbinomial(), 
  formula = f_team_match_0,
  prior = negbin_0,
  sample_prior = TRUE,
  file = "/work/50114/MAG/modeling/models/negbin_post_match_0"
)

## baseline 
negbin_post_match_1 <- fit_model(
  family = negbinomial(), 
  formula = f_team_match_1,
  prior = negbin_1,
  sample_prior = TRUE,
  file = "/work/50114/MAG/modeling/models/negbin_post_match_1"
)

## baseline 
negbin_post_match_01 <- fit_model(
  family = negbinomial(), 
  formula = f_team_match_01,
  prior = negbin_01,
  sample_prior = TRUE,
  file = "/work/50114/MAG/modeling/models/negbin_post_match_01"
)

traces

Looks MUCH better now than before.

# looks pretty good. 
plot(negbin_post_match_0, N = 3)

# same as above 
plot(negbin_post_match_1, N = 3)

# slightly different: might be more appropriate? 
plot(negbin_post_match_01, N = 3)

check estimates

More effective samples for intercept models. No Rhat issues, looks pretty good. Not strictly “significant” given 95% CI, but close – and also stronger effect of teamsize (although problematic because of outlier) – connected with the pareto k issue.

print(negbin_post_match_0)
##  Family: negbinomial 
##   Links: mu = log; shape = identity 
## Formula: c_5 ~ 0 + condition_fct + condition_fct:teamsize_scaled + (1 | id_match) 
##    Data: d (Number of observations: 744) 
##   Draws: 2 chains, each with iter = 2000; warmup = 0; thin = 1;
##          total post-warmup draws = 4000
## 
## Group-Level Effects: 
## ~id_match (Number of levels: 391) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.38      0.20     0.03     0.77 1.00      673     1497
## 
## Population-Level Effects: 
##                                         Estimate Est.Error l-95% CI u-95% CI
## condition_fctexperiment                     1.83      0.14     1.55     2.11
## condition_fctcontrol                        1.65      0.16     1.32     1.95
## condition_fctexperiment:teamsize_scaled     1.42      0.69     0.13     2.82
## condition_fctcontrol:teamsize_scaled        0.75      0.85    -0.92     2.43
##                                         Rhat Bulk_ESS Tail_ESS
## condition_fctexperiment                 1.00     2237     2728
## condition_fctcontrol                    1.00     1685     2664
## condition_fctexperiment:teamsize_scaled 1.00     6462     3092
## condition_fctcontrol:teamsize_scaled    1.00     7004     3074
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape     0.22      0.02     0.19     0.26 1.00     1900     2477
## 
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
print(negbin_post_match_1) # more effective samples 
##  Family: negbinomial 
##   Links: mu = log; shape = identity 
## Formula: c_5 ~ 1 + condition_fct + condition_fct:teamsize_scaled + (1 | id_match) 
##    Data: d (Number of observations: 744) 
##   Draws: 2 chains, each with iter = 2000; warmup = 0; thin = 1;
##          total post-warmup draws = 4000
## 
## Group-Level Effects: 
## ~id_match (Number of levels: 391) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.38      0.19     0.03     0.75 1.00      776     1484
## 
## Population-Level Effects: 
##                                         Estimate Est.Error l-95% CI u-95% CI
## Intercept                                   1.85      0.14     1.57     2.12
## condition_fctcontrol                       -0.17      0.19    -0.55     0.19
## condition_fctexperiment:teamsize_scaled     1.35      0.69     0.07     2.79
## condition_fctcontrol:teamsize_scaled        0.64      0.87    -1.04     2.40
##                                         Rhat Bulk_ESS Tail_ESS
## Intercept                               1.00     2870     2950
## condition_fctcontrol                    1.00     5713     3120
## condition_fctexperiment:teamsize_scaled 1.00     7603     3332
## condition_fctcontrol:teamsize_scaled    1.00     6773     3107
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape     0.22      0.02     0.19     0.26 1.00     2016     2681
## 
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
print(negbin_post_match_01) 
##  Family: negbinomial 
##   Links: mu = log; shape = identity 
## Formula: c_5 ~ 0 + Intercept + condition_fct + condition_fct:teamsize_scaled + (1 | id_match) 
##    Data: d (Number of observations: 744) 
##   Draws: 2 chains, each with iter = 2000; warmup = 0; thin = 1;
##          total post-warmup draws = 4000
## 
## Group-Level Effects: 
## ~id_match (Number of levels: 391) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.37      0.20     0.02     0.74 1.01      495     1296
## 
## Population-Level Effects: 
##                                         Estimate Est.Error l-95% CI u-95% CI
## Intercept                                   1.84      0.14     1.56     2.10
## condition_fctcontrol                       -0.14      0.19    -0.50     0.23
## condition_fctexperiment:teamsize_scaled     1.40      0.70     0.07     2.85
## condition_fctcontrol:teamsize_scaled        0.63      0.88    -1.07     2.39
##                                         Rhat Bulk_ESS Tail_ESS
## Intercept                               1.00     2135     2580
## condition_fctcontrol                    1.00     4000     2716
## condition_fctexperiment:teamsize_scaled 1.00     5289     3030
## condition_fctcontrol:teamsize_scaled    1.00     5187     2875
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape     0.22      0.02     0.19     0.26 1.00     1378     1877
## 
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

plot implications

y <- d$c_5
y_0 <- posterior_predict(negbin_post_match_0, draws = 500)
y_1 <- posterior_predict(negbin_post_match_1, draws = 500)
y_01 <- posterior_predict(negbin_post_match_01, draws = 500)

no intercept

looks pretty good. a lot of uncertainty around 0 and 1 still.

ppc_dens_overlay(y, y_0[1:50, ]) + xlim(0, 50)
## Warning: Removed 1012 rows containing non-finite values (stat_density).
## Warning: Removed 23 rows containing non-finite values (stat_density).

ppc_hist(y, y_0[1:5, ]) + xlim(-1, 50)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 141 rows containing non-finite values (stat_bin).
## Warning: Removed 12 rows containing missing values (geom_bar).

regular intercept

Perhaps a bit worse (e.g. with the undershoot at around c_5 = 5).

ppc_dens_overlay(y, y_1[1:50, ]) + xlim(0, 50)
## Warning: Removed 1041 rows containing non-finite values (stat_density).
## Warning: Removed 23 rows containing non-finite values (stat_density).

ppc_hist(y, y_1[1:5, ]) + xlim(-1, 50)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 109 rows containing non-finite values (stat_bin).
## Warning: Removed 12 rows containing missing values (geom_bar).

0 + Intercept

Looks more or less the same.

ppc_dens_overlay(y, y_01[1:50, ]) + xlim(0, 50)
## Warning: Removed 968 rows containing non-finite values (stat_density).
## Warning: Removed 23 rows containing non-finite values (stat_density).

ppc_hist(y, y_01[1:5, ]) + xlim(-1, 50)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 155 rows containing non-finite values (stat_bin).
## Warning: Removed 12 rows containing missing values (geom_bar).

pareto k issues

https://bookdown.org/content/4857/monsters-and-mixtures.html

no intercept

mainly studies that are (relatively) low teamsize and high citation

d %>% 
  mutate(k = negbin_post_match_0$criteria$loo$diagnostics$pareto_k) %>% 
  filter(k > .7) %>% 
  select(c_5, teamsize_scaled, condition_fct, id_match, k)
## # A tibble: 11 × 5
##      c_5 teamsize_scaled condition_fct id_match     k
##    <dbl>           <dbl> <fct>         <fct>    <dbl>
##  1    30            0.06 experiment    32       0.751
##  2     6            0.14 control       65       0.725
##  3     6            0.08 control       89       0.728
##  4    88            0.08 control       106      0.709
##  5    19            0.08 control       180      0.816
##  6    77            0.02 control       182      0.776
##  7    77            0.02 control       188      0.742
##  8    76            0.02 experiment    211      0.711
##  9    36            0.02 control       243      0.765
## 10    74            0.12 experiment    284      0.801
## 11    76            0.04 experiment    310      0.848

1 + …

some of the same here, mainly studies with high c_5 and low teamsize.

d %>% 
  mutate(k = negbin_post_match_1$criteria$loo$diagnostics$pareto_k) %>% 
  filter(k > .7) %>% 
  select(c_5, teamsize_scaled, condition_fct, id_match, k)
## # A tibble: 10 × 5
##      c_5 teamsize_scaled condition_fct id_match     k
##    <dbl>           <dbl> <fct>         <fct>    <dbl>
##  1    10            0.2  experiment    3        0.722
##  2     5            0.02 control       135      0.739
##  3     7            0.06 experiment    158      0.727
##  4    77            0.02 control       184      0.755
##  5    27            0    experiment    194      0.750
##  6     6            0.06 control       200      0.756
##  7    20            0.04 control       212      0.722
##  8   129            1    experiment    222      0.873
##  9    84            0.2  control       283      0.756
## 10     5            0.02 control       324      0.765

0 + Intercept

Seems to handle influential observations much better. Good explanation for all of the outliers: (1) the two control studies are high influence because they are the same (fix earlier in pipeline) (2) the experiment study is high influence because there are very few studies with high teamsize (could do log-transformation) and the corresponding control study has zero citations… so this has an outsize influence on the interaction with condition and teamsize.

# two studies that are the same in control (issue to be resolved earlier in the pipeline).
# the outlier study (experiment) which is max in teamsize and also extremely high citation
# whereas the 
d %>% 
  mutate(k = negbin_post_match_01$criteria$loo$diagnostics$pareto_k) %>% 
  filter(k > .7) %>% 
  select(c_5, teamsize_scaled, condition_fct, id_match, k)
## # A tibble: 3 × 5
##     c_5 teamsize_scaled condition_fct id_match     k
##   <dbl>           <dbl> <fct>         <fct>    <dbl>
## 1    77            0.02 control       184      0.753
## 2    77            0.02 control       185      0.770
## 3   129            1    experiment    222      0.720

Quick model comparison

Basically no difference, but appears to prefer the intercept models. Do we know why that is?

loo_compare(negbin_post_match_0,
            negbin_post_match_1,
            negbin_post_match_01)
##                      elpd_diff se_diff
## negbin_post_match_1   0.0       0.0   
## negbin_post_match_01 -0.2       0.3   
## negbin_post_match_0  -0.7       0.4
loo_model_weights(negbin_post_match_0,
                  negbin_post_match_1,
                  negbin_post_match_01)
## Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.

## Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.

## Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
## Method: stacking
## ------
##                      weight
## negbin_post_match_0  0.000 
## negbin_post_match_1  0.581 
## negbin_post_match_01 0.419